
/*Set up the director*/


global datadir  "~/desktop/Replication/data"
global outdir   "~/desktop/Replication/output"
global script_path "~/desktop/Replication/"



use "$datadir/deidentified.dta", clear


/* Install programs */ 
ssc install orth_out, replace
ssc install rwolf, replace
ssc install confa, replace
ssc install paran, replace



/************* TABLES & FIGURES*****************/

*** Table 1 & A1: Descriptive Statitsics and Balance 
	* Label 
	lab var treat 					"Treatment"
	lab var ch_age_month_3 			"Age in months"
	lab var ch_male 				"Male" 
	lab var ch_birth_weight_low 	"Low birthweight" 
	lab var ch_firstborn 			"First born"
	lab var hh_dibao 				"Social security support recipient" 
	lab var carer_tv_parent_3		"Mom at home"
	lab var carer_edu_high 			"Caregiver education $\ge$ 9 years" 
	lab var ch_breastfed_ever 		"Ever breastfed"
	lab var ch_breastfed_still 		"Still breastfed at 12 months" 
	lab var inv_story_3 			"Told story to baby yesterday" 
	lab var inv_book_3 				"Read book to baby yesterday" 
	lab var inv_song_3 				"Sang song to baby yesterday"
	lab var inv_toy_3 				"Played with baby yesterday"
	lab var inv_books_3 			"Number of books in household" 
	lab var mistrust_fpc_village_3 	"Unfavourable perception of FPC" 
	lab var ch_health_illdays_3		"Number of days ill past month" 
	lab var ch_anemic_3 			"Aneamia prevalence" 
	lab var bayley_mdi_3 			"Bayley MDI test score" 
	lab var bayley_pdi_3			"Bayley PDI test score" 
	lab var asq_cut_3 				"ASQ: Socio-Emotional Difficulties" 

	* Set globals: 
	global base_vars "ch_age_month_3 ch_male ch_birth_weight_low ch_firstborn ch_breastfed_ever ch_breastfed_still ch_anemic_3 ch_health_illdays_3 bayley_mdi_low_3 bayley_pdi_low_3 asq_cut_3 hh_dibao  carer_tv_parent_3 carer_edu_high  mistrust_fpc_county_3 inv_story_3 inv_book_3 inv_song_3 inv_toy_3 inv_books_3"

	* Joint test Table 1:
	probit treat $base_vars 
	testparm $base_vars
	
	*** Table 1: 
	orth_out $base_vars using "$outdir/tab1.tex" , by(treat)   vce(cluster village_id) ///
			 pcompare  se	numlabel armlabel("Control" "Treatment") title(Descriptive Statistics and Balance)  replace latex full
	
	* Joint test Table 1:
	probit treat $base_vars if attrition==0
	testparm $base_vars
	
	*** Table A1: 
	orth_out $base_vars if attrition==0 using "$outdir/Appendix/tabA1.tex" , by(treat)   vce(cluster village_id) ///
			 pcompare  se	numlabel armlabel("Control" "Treatment") title(Descriptive Statistics and Balance Adjusted for Attrition)  replace latex full

*** TABLE 2: ITT EFFECTS COGNITIVE SKILLS 
			
		eststo  clear 	
		*define sample that has measures on all cognitive subdomains
		gen sample_1=1 if wppsi_vc!=. & wppsi_vs!=. & wppsi_fr!=.& wppsi_wm!=. & wppsi_ps!=.
		
		eststo: reg wppsi_vc        treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1, cluster (village_id) 
		eststo: reg wppsi_vs        treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1, cluster (village_id) 
		eststo: reg wppsi_fr        treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1, cluster (village_id)  
		eststo: reg wppsi_wm        treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1, cluster (village_id)  
		eststo: reg wppsi_ps        treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1, cluster (village_id)  
		
		esttab using  "$outdir/table_2.tex", replace b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label noconstant  	///
		keep(treat)	  																												///
		title(Average Treatment Effects on Cognitive Outcomes at School-Entry\label{tab2}) 										///
		nonotes addnotes(" ADD NOTES. All standard errors are clustered at the village level."	///			
						 "$* p < 0.10$, $** p < 0.05$, $*** p < 0.01$.")	

						 
        **joint significant tests
        reg wppsi_vc         treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1
        est store irt1
        reg wppsi_vs         treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1
        est store irt2
        reg wppsi_fr         treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1
        est store irt3
        reg wppsi_wm         treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1
        est store irt4
        reg wppsi_ps         treat cog_base motor_base asq_base ch_male		i.county_id if sample_1==1
        est store irt5

        suest irt1 irt2 irt3 irt4 irt5 , vce(cluster village_id)

        test [irt1_mean]treat=0
        test [irt2_mean]treat=0, accum
        test [irt3_mean]treat=0, accum
        test [irt4_mean]treat=0, accum
        test [irt5_mean]treat=0, accum

						 
		
		* FDR q-values
        preserve		
        keep if sample_1==1 
        rwolf wppsi_vc wppsi_vs   wppsi_fr   wppsi_wm wppsi_ps, ///
		indepvar(treat) method(regress) controls(cog_base motor_base asq_base ch_male i.county_id) ///
        seed(110778) reps(500) strata(county_id) cluster(village_id)  vce(cluster village_id)
        matrix D = e(RW)
        matrix P = D[1..5,1]
        svmat P
        ren P1 pval

        keep pval
        do  "$script_path/fdr_sharpened_qvalues.do"
        export excel "$outdir/FDRqvals_cog.xls", sheet(main_outcomes, replace) firstr(variables)
        restore
		
*** TABLE 3: ITT EFFECTS NON-COGNITIVE OUTCOMES
	
		eststo  clear 	
		*define sample that has measures on all non-cognitive subdomains
		gen sample_2=1 if sdq_ext!=. & sdq_int!=. & sdq_pro!=.
		
		eststo: reg sdq_ext   treat cog_base motor_base asq_base ch_male		i.county_id if sample_2==1 , cluster (village_id) 
		eststo: reg sdq_int   treat cog_base motor_base asq_base ch_male	    i.county_id if sample_2==1 , cluster (village_id) 
		eststo: reg sdq_pro   treat cog_base motor_base asq_base ch_male	    i.county_id if sample_2==1 , cluster (village_id) 
		
		esttab using  "$outdir/table_3.tex", replace b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label noconstant  	///
		keep(treat)	  																												///
		title(Average Treatment Effects on Non-cognitive outcomes at School-Entry\label{tab3}) 										///
		nonotes addnotes(" ADD NOTES. All standard errors are clustered at the village level "  	///
						 "$* p < 0.10$, $** p < 0.05$, $*** p < 0.01$.")		   

        * FDR q-values						 
        preserve
        keep if sample_2==1						 
        rwolf sdq_ext sdq_int sdq_pro, indepvar(treat) method(regress) ///
        controls(cog_base motor_base asq_base ch_male i.county_id) ///
        seed(110778) reps(500) strata(county_id) cluster(village_id)  vce(cluster village_id)
        matrix D = e(RW)
        matrix P = D[1..3,1]
        svmat P
        ren P1 pval

        keep pval
        do  "$script_path/fdr_sharpened_qvalues.do"
        export excel "$outdir/FDRqvals_noncog.xls", sheet(main_outcomes, replace) firstr(variables)
        restore
	
	
*** TABLE 4 : ITT EFFECTS ON PARENTAL INVESTMENT 
		
		* Panel A: Parental Investment at home
		eststo clear 
		eststo: reg  time_investment        treat   cog_base motor_base asq_base  	 i.county_id  , cluster(village_id) 
		eststo: reg  material_investment    treat   cog_base motor_base asq_base  	 i.county_id  , cluster(village_id) 
		
		esttab using  "$outdir/table_4a.tex", replace b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label noconstant  	///
		keep(treat)	  																												///
		title(Average Treatment Effects on Parental Investment at School-Entry\label{tab4a}) 										///
		nonotes addnotes(" ADD NOTES. All standard errors are clustered at the village level "  	///
						 "$* p < 0.10$, $** p < 0.05$, $*** p < 0.01$.")	

	
		* Panel B: Kindergarden Enrolment 

		eststo clear 
		eststo: reg kt_enroll 			treat cog_base motor_base asq_base i.county_id	 , cluster(village_id) 
		eststo: reg kt_start_month      treat cog_base motor_base asq_base i.county_id     , cluster(village_id)	 
		eststo: reg kt_quality		    treat cog_base motor_base asq_base i.county_id     , cluster(village_id)
		
		esttab using  "$outdir/table_4b.tex", replace b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label noconstant  	///
		keep(treat)	  																												///
		title(Average Treatment Effects on Parental Investment at School-Entry\label{tab4b}) 										///
		nonotes addnotes(" ADD NOTES. All standard errors are clustered at the village level "  	///
						 "$* p < 0.10$, $** p < 0.05$, $*** p < 0.01$.")	
						 
		**FDR q-values (investment at home )
		preserve
		rwolf time_investment material_investment, indepvar(treat) method(regress) ///
        controls(cog_base motor_base asq_base ch_male i.county_id) ///
        seed(110778) reps(500) strata(county_id) cluster(village_id)  vce(cluster village_id)
        matrix D = e(RW)
        matrix P = D[1..2,1]
        svmat P
        ren P1 pval

        

        keep pval
        do  "$script_path/fdr_sharpened_qvalues.do"
        export excel "$outdir/FDRqvals_investment.xls", sheet(main_outcomes, replace) firstr(variables)
        restore	
		
		* Bounding excise
		eststo: reg kt_start_month_lb 	treat cog_base motor_base asq_base 	 i.county_id     , cluster(village_id) 	
		eststo: reg kt_start_month_hb 	treat cog_base motor_base asq_base 	 i.county_id   	 , cluster(village_id) 
		
        eststo: reg kt_quality_lb 		treat cog_base motor_base asq_base 	 i.county_id     , cluster(village_id)
		eststo: reg kt_quality_hb		treat cog_base motor_base asq_base 	 i.county_id     , cluster(village_id)
		

		
*** Table 5: ITT EFFECT ON PRESCHOOL LOCATION
        
		est clear
		eststo: reg township_preschool 			treat cog_base motor_base asq_base  i.county_id, cluster(village_id) 
		eststo: reg closest_enrollment          treat cog_base motor_base asq_base  i.county_id, cluster(village_id) 

		
		esttab using  "$outdir/table_5.tex", replace b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label noconstant  	///
		keep(treat)	  																												///
		title(Average Treatment Effects on Parental Investment at School-Entry\label{tab5}) 										///
		nonotes addnotes(" ADD NOTES. All standard errors are clustered at the village level "  	///
						 "$* p < 0.10$, $** p < 0.05$, $*** p < 0.01$.")	

		**FDR q-values (preschool-related variables)
		preserve
		rwolf kt_enroll kt_start_month kt_quality township_preschool closest_enrollment, indepvar(treat) method(regress) ///
        controls(cog_base motor_base asq_base ch_male i.county_id) ///
        seed(110778) reps(500) strata(county_id) cluster(village_id)  vce(cluster village_id)
        matrix D = e(RW)
        matrix P = D[1..5,1]
        svmat P
        ren P1 pval

        

        keep pval
        do  "$script_path/fdr_sharpened_qvalues.do"
        export excel "$outdir/FDRqvals_preschool.xls", sheet(main_outcomes, replace) firstr(variables)
        restore
			
		
*** Table 6: Parens' Prefernce and Elasticity of Demands

**combine choice sets with the choice set
use "$datadir/deidentified.dta", clear
keep hhid treat 

tostring hhid,replace	
merge 1:m hhid using "$datadir/choice_set.dta"
keep if _merge==3
drop _merge

keep if distance<81326 /**only consider preschools that have distance to HH below around 81km. There is no actual enrollment above this number**/
bysort hhid:egen distance_rank=rank(distance)
bysort hhid:egen kt_enroll=total(actual_enroll)

drop if kt_enroll==0

**calculate how many preschool choice does one HH have on average
bysort hhid:gen hhid_N=_N
preserve
bysort hhid:gen hhid_one=_n
keep if hhid_one==1
sum hhid_N
restore

set more off

sum  kt_total_expense  quality distance_rank if e(sample)==1

replace quality=quality+3.284549 /**need positive value for calculation*/

est clear
eststo:clogit actual_enroll  kt_total_expense quality distance_rank ///
treat#c.kt_total_expense treat#c.quality  1.treat#c.distance_rank, group(hhid) vce(cluster hhid)

esttab using "$outdir/table6.tex", replace b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label  noconstant

est clear
eststo:clogit actual_enroll  0.treat#c.kt_total_expense 0.treat#c.quality 0.treat#c.distance_rank ///
1.treat#c.kt_total_expense 1.treat#c.quality  1.treat#c.distance_rank, group(hhid) vce(cluster hhid)

esttab using "$outdir/table6.tex", append b(3) se(3) r2(2) star(* 0.1 ** 0.05 *** 0.01) booktabs label  noconstant

sum  kt_total_expense  quality distance_rank if e(sample)==1

bootstrap elasticity_1=(_b[0.treat#c.kt_total_expense]*1.17*0.97) elasticity_2=(_b[1.treat#c.kt_total_expense]*1.17*0.97) ///
elasticity_3=(_b[0.treat#c.quality]*3.12*0.97) elasticity_4=(_b[1.treat#c.quality]*3.12*0.97) ///
elasticity_5=(_b[0.treat#c.distance_rank]*18.65*0.97) elasticity_6=(_b[1.treat#c.distance_rank]*18.65*0.97) ///
, reps(200): clogit actual_enroll  0.treat#c.kt_total_expense 0.treat#c.quality 0.treat#c.distance_rank ///
1.treat#c.kt_total_expense 1.treat#c.quality  1.treat#c.distance_rank, group(hhid) vce(cluster hhid)

/**figures**/

    *** Figure 2
	*** COGNITIVE SKILLS WIPPSI *** 
	use "$datadir/deidentified.dta", clear 
	
	
	* test equality of distribution 
	foreach var of varlist wppsi_vc wppsi_vs wppsi_fr wppsi_wm wppsi_ps { 
			ksmirnov `var', by(treat) 
			}
			
			
	* label 
	lab var wppsi_vc    "Verbal Comprehension"
	lab var wppsi_vs 	"Visual Spatial"
	lab var wppsi_fr 	"Fluid Reasoning"
	lab var wppsi_wm 	"Working Memory"
	lab var wppsi_ps 	"Processing Speed"

			
	* drop outliers 
	foreach var of varlist wppsi_vc wppsi_vs wppsi_fr wppsi_wm wppsi_ps { 
			drop if `var' < -3 
			drop if `var' > 3
			}
			
	cdfplot wppsi_vc ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure2a.png",replace 
	cdfplot wppsi_vs ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure2b.png",replace 
	cdfplot wppsi_fr ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure2c.png",replace 
	cdfplot wppsi_ps ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure2d.png",replace 
	cdfplot wppsi_wm ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono) 
		graph export "$outdir/figure2e.png",replace 
	
	
	
	
		
	*** Figure 3	
	*** NON-COGNITIVE SKILLS WIPPSI *** 
	use "$datadir/deidentified.dta", clear 
	drop if sdq_ext<-3 | sdq_ext>3

	
	cdfplot sdq_ext ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white)) xtitle("") scheme(s2mono)
		graph export "$outdir/figure3a.png",replace 
	
	use "$datadir/deidentified.dta", clear 
	drop if sdq_int<-3 | sdq_int>3

	cdfplot sdq_int ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure3b.png",replace 
	
	use "$datadir/deidentified.dta", clear 
	drop if sdq_pro<-3 | sdq_pro>3
	
	cdfplot sdq_pro ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure3c.png",replace 
	

	***Figure 4	
	*** PRESCHOOL *** 
	use "$datadir/deidentified.dta", clear 

	label define treat 0 "Control village" 1 "Treatment village"
	label value treat treat 

	label define kt_enroll_town 0 "Village" 1 "Township"
	label value township_preschool kt_ennroll_town
	
	label var kt_enroll      "Enrollment Preschool" 
	label var township_preschool  "Enrollment Township Preschool"


	* Preschool enrolment 
	cibar kt_enroll, over(treat) level(90) graphopts( ytitle("Enrollment Preschool")  plotregion(color(white)) graphregion(color(white)) scheme(s2mono)) barlabel(treat) 
	graph export "$outdir/figure4a.png",replace
	
	*Preschool township enrolment 
	cibar township_preschool, over(treat) level(90) graphopts( ytitle("Enrollment Township Preschool") plotregion(color(white)) graphregion(color(white)) scheme(s2mono)) barlabel(treat) 
	graph export "$outdir/figure4b.png",replace
	
	* Age preschool enrolment 
	
	cdfplot kt_start_month_hb if kt_enroll==1 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control"))   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure4c.png",replace 	

	* preschool quality 
	
	cdfplot kt_quality  ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control"))   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
		graph export "$outdir/figure4d.png",replace 	

	
/*************APPENDIX TABLES & FIGURES*****************/

********Appendix A
   
   ***the codes for the Appendix Table A1 can be found above along with Table 1 
	
   ***Appendix Table A2
   *Preschool abd Teacher Characterisrics
		
   global KT_characters "kt_size kt_subsidy kt_tuition kt_teacher_age	kt_teacher_male kt_teacher_exp kt_teacher_salary kt_teacher_bachelor kt_teacher_training kt_ptr kt_totalroom kt_outdoor  kt_playroom kt_exersiceroom kt_dormitories kt_breakfast kt_readbooks kt_exercise kt_art kt_science kt_social kt_language" 
   
   
   *** Table A2: 
   preserve 
   bysort preschool_id:gen preschool_one=_n
   keep if preschool_one==1
   count if preschool_location==0
   count if preschool_location==1
   orth_out $KT_characters using "$outdir/Appendix/tabA2.tex" , by(preschool_location)    vce(cluster village_id) ///
			 pcompare  se	numlabel armlabel("Village" "Town") title(Preschool and Teacher Characteristics)  replace latex full
   restore		 

   
********Appendix B1

/*------------------------------------------------------------------------------	
				ITEM RESPONSE THEORY - WPPSI TESTS 
				
The IRT scores of WPPSI scales are generated with a sample of 1542 children surveyed at the same time.
Among them, 472 children are the sample of interest of the study			
-------------------------------------------------------------------------------*/
use "$datadir/wppsi_deidenified",clear

	* define global subscales 
	global WPPSI_1  "wppsi_1_1  - wppsi_1_17" 
	global WPPSI_2  "wppsi_2_1  - wppsi_2_29" 
	global WPPSI_3  "wppsi_3_1  - wppsi_3_26" 
	global WPPSI_4  "wppsi_4_1" 
	global WPPSI_5  "wppsi_5_1  - wppsi_5_35" 
	global WPPSI_6  "wppsi_6_1  - wppsi_6_23" 
	global WPPSI_7  "wppsi_7_1  - wppsi_7_27" 
	global WPPSI_8  "wppsi_8_1  - wppsi_8_2" 
	global WPPSI_9  "wppsi_9_1  - wppsi_9_20" 
	global WPPSI_10 "wppsi_10_1 - wppsi_10_12" 
	global WPPSI_11 "wppsi_11_1" 
	global WPPSI_12 "wppsi_12_1 - wppsi_12_31" 
	global WPPSI_13 "wppsi_13_1 - wppsi_13_24" 
	
	sum  $WPPSI_1 
	sum  $WPPSI_2  
	sum  $WPPSI_3  
	sum  $WPPSI_4 
	sum  $WPPSI_5  
	sum  $WPPSI_6  
	sum  $WPPSI_7  
	sum  $WPPSI_8 
	sum  $WPPSI_9 
	sum  $WPPSI_10 
	sum  $WPPSI_11
	sum  $WPPSI_12
	sum  $WPPSI_13 
	
/*-------------------------------*/
/*   VERBAL COMPREHENSION 		 */	
/*-------------------------------*/	

* pca 
	pca $WPPSI_2
	
	* Horns Test
	paran $WPPSI_2
	
	* drop items with  zero variation
	drop wppsi_2_29 
	
* estimate model 
	irt 2pl wppsi_2_* 
	estat report ,  byparm 
	
	matrix results_2=r(table)
	
	matrix coff_2=results_2[1,29..56]
	matrix ll_2=results_2[5,29..56]
	matrix ul_2=results_2[6,29..56]
	
	matrix diff_2=(coff_2\ll_2\ul_2)
	matrix diff_2=diff_2'
	
	preserve
	clear
	svmat diff_2
	rename diff_21 diff
	rename diff_22 lci
	rename diff_23 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB11a_dif_information.png",replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB12a_icc_information.png" ,replace
	

*** WPPSI 6 Similarities Test *** 

	* pca 
	pca $WPPSI_6
	
	* Horns Test
	paran $WPPSI_6
	
	* make items binary 
	foreach var of varlist wppsi_6_* {
			replace `var'=1 if `var'==2 
			}
	sum $WPPSI_6
	
	* estimate model 
	irt 2pl $WPPSI_6
	estat report , byparm 	
	
	matrix results_6=r(table)
	
	matrix coff_6=results_6[1,24..46]
	matrix ll_6=results_6[5,24..46]
	matrix ul_6=results_6[6,24..46]
	
	matrix diff_6=(coff_6\ll_6\ul_6)
	matrix diff_6=diff_6'
	preserve
	clear
	svmat diff_6
	rename diff_61 diff
	rename diff_62 lci
	rename diff_63 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB11b_dif_similarity.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB12b_icc_similarity.png" ,replace
	

/*------------------------*/
/*  VISUAL SPATIAL        */	
/*------------------------*/	




*** WPPSI 1 BLOCK DESIGN *** 

	* pca 
	pca $WPPSI_1 
	
	* Horns Test
	paran  $WPPSI_1 
	
	

	* estimate model 
	irt 2pl  $WPPSI_1 
	estat report , byparm 	
	
	matrix results_1=r(table)
	
	matrix coff_1=results_1[1,18..34]
	matrix ll_1=results_1[5,18..34]
	matrix ul_1=results_1[6,18..34]
	
	matrix diff_1=(coff_1\ll_1\ul_1)
	matrix diff_1=diff_1'
	preserve
	clear
	svmat diff_1
	rename diff_11 diff
	rename diff_12 lci
	rename diff_13 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB21a_dif_block.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB22a_icc_block.png" ,replace

*** WPPSI 10 OBJECT ASSEMBLY *** 

	* pca 
	pca $WPPSI_10
	
	
	* Horns Test
	paran $WPPSI_10
	
	* make items binary 
	foreach var of varlist wppsi_10_* {
			replace `var'=1 if `var'==2 
			replace `var'=1 if `var'==3 
			replace `var'=1 if `var'==4
			replace `var'=1 if `var'==5
			}

	* estimate model 
	irt 2pl  $WPPSI_10
	estat report , byparm 	
	
	matrix results_10=r(table)
	
	matrix coff_10=results_10[1,13..24]
	matrix ll_10=results_10[5,13..24]
	matrix ul_10=results_10[6,13..24]
	
	matrix diff_10=(coff_10\ll_10\ul_10)
	matrix diff_10=diff_10'
	preserve
	clear
	svmat diff_10
	rename diff_101 diff
	rename diff_102 lci
	rename diff_103 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB21b_dif_object.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB22b_icc_object.png" ,replace



	
	
	
/*-------------------------*/
/*   FLUID REASONING       */	
/*-------------------------*/

*** WPPSI 3 Matrix Reasoning *** 

	* pca 
	pca $WPPSI_3
	
	
	* Horns Test
	paran $WPPSI_3
	

	* estimate model 
	irt 2pl  $WPPSI_3
	estat report , byparm 	
	
	matrix results_3=r(table)
	
	matrix coff_3=results_3[1,27..52]
	matrix ll_3=results_3[5,27..52]
	matrix ul_3=results_3[6,27..52]
	
	matrix diff_3=(coff_3\ll_3\ul_3)
	matrix diff_3=diff_3'
	preserve
	clear
	svmat diff_3
	rename diff_31 diff
	rename diff_32 lci
	rename diff_33 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB31a_dif_matrix.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB32a_icc_matrix.png" ,replace


*** WPPSI 7 Picture concepts *** 

	* pca 
	pca $WPPSI_7
	
	* Horns Test
	paran $WPPSI_7
	
	* take out items with no variation 
	sum $WPPSI_7 
	drop wppsi_7_27
	
	* estimate model 
	irt 2pl  wppsi_7_1  - wppsi_7_26
	estat report , byparm 	
	
	matrix results_7=r(table)
	
	matrix coff_7=results_7[1,27..52]
	matrix ll_7=results_7[5,27..52]
	matrix ul_7=results_7[6,27..52]
	
	matrix diff_7=(coff_7\ll_7\ul_7)
	matrix diff_7=diff_7'
	preserve
	clear
	svmat diff_7
	rename diff_71 diff
	rename diff_72 lci
	rename diff_73 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB31b_dif_picture_concept.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB32b_icc_picture_concept.png" ,replace




/*-------------------------*/
/*    WORKING MEMORY       */	
/*-------------------------*/

*** WPPSI 5 PICTURE MEMORY *** 

	* pca 
	pca $WPPSI_5 
	
	* Horns Test
	paran $WPPSI_5 
	
	* take out items witout variation 
	sum $WPPSI_5 
	drop  wppsi_5_32  - wppsi_5_35 
	
	
	* estimate model 
	irt 2pl wppsi_5_1 -  wppsi_5_31 
	estat report , byparm 	
	
	matrix results_5=r(table)
	
	matrix coff_5=results_5[1,32..62]
	matrix ll_5=results_5[5,32..62]
	matrix ul_5=results_5[6,32..62]
	
	matrix diff_5=(coff_5\ll_5\ul_5)
	matrix diff_5=diff_5'
	preserve
	clear
	svmat diff_5
	rename diff_51 diff
	rename diff_52 lci
	rename diff_53 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB41b_dif_picture_memory.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB42b_icc_picture_memory.png" ,replace
	
	

*** WPPSI 9  ZOO LOCATIONS *** 

	* pca 
	pca $WPPSI_9 
	
	* Horns Test
	paran $WPPSI_9 
	
	* take out items witout variation 
	sum $WPPSI_9 
	drop  wppsi_9_18 - wppsi_9_20 
	
	
	* estimate model 
	irt 2pl wppsi_9_1 - wppsi_9_17
	estat report , byparm 	
	
	matrix results_9=r(table)
	
	matrix coff_9=results_9[1,18..34]
	matrix ll_9=results_9[5,18..34]
	matrix ul_9=results_9[6,18..34]
	
	matrix diff_9=(coff_9\ll_9\ul_9)
	matrix diff_9=diff_9'
	preserve
	clear
	svmat diff_9
	rename diff_91 diff
	rename diff_92 lci
	rename diff_93 uci
	gen N =_n		
	graph twoway (bar diff N ) (rcap lci uci N), legend(label(1 "point estimate") label(2 "lower CI/upper CI")) graphregion(color(white))
	
	graph export "$outdir/Appendix/FigureB41a_dif_zoo.png" ,replace
	restore
		
	* item characteristic curve (icc) 
	irtgraph icc , legend(off) 	xtitle(Latent Skill) ytitle(Probability of correct response) title("") graphregion(color(white))
	graph export "$outdir/Appendix/FigureB42a_icc_zoo.png" ,replace
	
    /****generate latent skills***/
	/**the outcome of interests in Table 2 is generated with the following process**/
    
	irt 2pl wppsi_1_* 
	predict irt_wppsi_1, latent 

	irt 2pl wppsi_2_* 
	predict irt_wppsi_2 , latent 
	
	irt 2pl wppsi_3_* 
	predict irt_wppsi_3, latent 

	irt 2pl wppsi_5_* 
	predict irt_wppsi_5, latent 

	irt 2pl wppsi_6_* 
	predict irt_wppsi_6, latent 
	
	irt 2pl wppsi_7_* 
	predict irt_wppsi_7, latent 
	
	irt 2pl wppsi_9_* 
	predict irt_wppsi_9, latent
	
	irt 2pl wppsi_10_* 
	predict irt_wppsi_10, latent
		
    destring hhid,replace
    merge 1:1 hhid using "$datadir/deidentified.dta"

    /* IRT BASED AGGREGATE SCORES */ 
	keep if _merge!=1
		
	* 1. VERBAL COMPHREHENSION [information (2.1 - 2.29) + similarities (6.1 - 6.23)] 
	egen irt_wppsi_2_rank  = rank(irt_wppsi_2) 
	egen irt_wppsi_6_rank  = rank(irt_wppsi_6) 
	gen  irt_wppsi_vc_rank = (irt_wppsi_2_rank + irt_wppsi_6_rank)/2 
	
		
	* 2. VISUAL SPATIAL [block design (1.1 - 1.17)  + object assembly (10.1 - 10.12)]
	egen irt_wppsi_1_rank  = rank(irt_wppsi_1) 
	egen irt_wppsi_10_rank = rank(irt_wppsi_10) 
	gen  irt_wppsi_vs_rank = (irt_wppsi_1_rank + irt_wppsi_10_rank)/2 
	
	
	* 3. FLUID REASONING [matrix reasoning (3.1 - 3.26)  + picture concepts (7.1 - 7.27)]
	egen irt_wppsi_3_rank  = rank(irt_wppsi_3) 
	egen irt_wppsi_7_rank  = rank(irt_wppsi_7) 
	gen  irt_wppsi_fr_rank = (irt_wppsi_3_rank + irt_wppsi_7_rank)/2 
		
	
	* 4. WORKING MEMORY [picture memory (5.1 - 5.35) + zoo locations (9.1 - 9.20)]	
	egen irt_wppsi_5_rank  = rank(irt_wppsi_5) 
	egen irt_wppsi_9_rank  = rank(irt_wppsi_9) 
	gen  irt_wppsi_wm_rank = (irt_wppsi_5_rank + irt_wppsi_9_rank)/2 
	
	
	* 5. PROCESSING SPEED [bug search (4 )  + cancelations (8.1 & 8.2)] - no IRT measures because continuous measures - 
	egen irt_wppsi_4_rank   = rank(wppsi_4_1) 
	egen irt_wppsi_8_a_rank = rank(wppsi_8_1) 
	egen irt_wppsi_8_b_rank = rank(wppsi_8_2) 
	gen  irt_wppsi_ps_rank  = (irt_wppsi_4_rank + irt_wppsi_8_a_rank + irt_wppsi_8_b_rank)/3
	
			
	
	* standardize by agemonth group 
	cap drop wppsi_vc wppsi_vs wppsi_fr wppsi_wm wppsi_ps  
	foreach var of varlist irt_wppsi_vc_rank irt_wppsi_vs_rank irt_wppsi_fr_rank irt_wppsi_wm_rank ///
	irt_wppsi_ps_rank {		
		cap drop `var'_st 
		cap drop  r r2 mean mean 
		lpoly `var' age, at (age) gen(mean) 
		g r  = `var'-mean 
		g r2 = r^2 
		
		cap drop var std 
		lpoly r2 age , at (age) gen(var) 
		g std=sqrt(var) 
		g `var'_st = (`var' - mean)/std
		}


	drop r r2 var mean std
	
	rename irt_*_rank_st *
    
	****Appendix A figures**
	
	cdfplot wppsi_2 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
	graph export "$outdir/Appendix/figureA1a.png",replace 
	cdfplot wppsi_6 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
	graph export "$outdir/Appendix/figureA1b.png",replace 
	cdfplot wppsi_1 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
	graph export "$outdir/Appendix/figureA2a.png",replace 
	cdfplot wppsi_10 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono)
	graph export "$outdir/Appendix/figureA2b.png",replace 
	cdfplot wppsi_3 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono) 
	graph export "$outdir/Appendix/figureA3a.png",replace 
	cdfplot wppsi_7 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono) 
	graph export "$outdir/Appendix/figureA3b.png",replace 
	cdfplot wppsi_9 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono) 
	graph export "$outdir/Appendix/figureA4a.png",replace 
	cdfplot wppsi_5 ,by(treat) legend(pos(6) label(2 "Treatment") label(1 "Control")) xsize(6) xlabel(-3 0 3)   plotregion(color(white)) graphregion(color(white))  xtitle("") scheme(s2mono) 
	graph export "$outdir/Appendix/figureA4b.png",replace 
	
	****Appendix B.2.
	*** SDQ assessment by parents *** 
 	 
	* global 
	global SDQ_PRO_P  "sdqp6  sdqp9  sdqp14 sdqp22 sdqp25" 
	global SDQ_EXT_P  "sdqp10 sdqp12 sdqp17 sdqp23 sdqp27 sdqp7  sdqp15 sdqp20 sdqp26 sdqp30"
	global SDQ_INT_P  "sdqp8  sdqp13 sdqp18 sdqp21 sdqp29 sdqp11 sdqp16 sdqp19 sdqp24 sdqp28" 

	
	*CFA scores 
	***Table B1
	confa( ext:  sdqp10 sdqp12 sdqp17 sdqp23 sdqp27 sdqp7  sdqp15 sdqp20 sdqp26 sdqp30) ///
	(int:  sdqp8  sdqp13 sdqp18 sdqp21 sdqp29 sdqp11 sdqp16 sdqp19 sdqp24 sdqp28) ///
	(pro: sdqp6  sdqp9 sdqp14 sdqp22 sdqp25), from(iv) 
	predict cfac_sdq_ext_p cfac_sdq_int_p  cfac_sdq_pro_p, bartlett  
	
	* standardize by age-month 
	cap drop sdq_ext sdq_int  sdq_pro
	foreach var of varlist cfac_sdq_* {		
		cap drop `var'_st 
		cap drop  r r2 mean mean 
		lpoly `var' age, at (age) gen(mean) 
		g r  = `var'-mean 
		g r2 = r^2 
		
		cap drop var std 
		lpoly r2 age , at (age) gen(var) 
		g std=sqrt(var) 
		g `var'_st = (`var' - mean)/std
		}

	drop r r2 var mean std	
	
	rename cfac_*_p_st *
	
***Appendix B.3.
***Table B2
		global INPUT_TIME  "inv_toy_5 inv_story_5 inv_song_5 inv_tv_5 " 
		global INPUT_COST  "inv_cost_books inv_cost_toys inv_cost_cloth inv_cost_school"	

			
		*Table B2 EFA 
		paran $INPUT_TIME 
		pca $INPUT_TIME 
		scree,ci graphregion(color(white))
		graph export "$outdir/Appendix/FigureB5a.png" ,replace
		
		paran $INPUT_COST
		pca $INPUT_COST
		scree,ci graphregion(color(white))
		graph export "$outdir/Appendix/FigureB5b.png" ,replace
		
		*Table B3 Factor Analysis 
		factor inv_toy_5 inv_story_5 inv_song_5 inv_tv_5,fa(1)
		predict fa_inv_time
		
		factor inv_cost_books inv_cost_toys inv_cost_cloth inv_cost_school,fa(1)
		predict fa_inv_mat
		

		* standardize individual inputs by distribution of control group
		cap drop time_investment material_investment
		foreach var of varlist fa_inv_time fa_inv_mat {
			sum `var' if treat==0, d 
			gen `var'_st = (`var'- `r(mean)')/`r(sd)'
			}
		
		rename fa_inv_time_st time_investment
        rename fa_inv_mat_st  material_investment 
		
***Appendix B.4.
    ***Figure B6
    global KT_quality "kt_size kt_subsidy kt_teacher_age kt_teacher_male kt_teacher_exp kt_teacher_salary kt_teacher_bachelor kt_teacher_training kt_ptr kt_totalroom kt_outdoor  kt_playroom kt_exersiceroom kt_dormitories kt_breakfast kt_readbooks kt_exercise kt_art kt_science kt_social kt_language"
	    cap drop kt_quality kt_quality_lb kt_quality_hb
	    *Table B4 EFA 
		paran $KT_quality
		pca $KT_quality
		scree,ci graphregion(color(white))
		graph export "$outdir/Appendix/FigureB6.png" ,replace
		
	    ***Table B5 factor analysis
		factor $KT_quality 
		predict kt_quality
	
	    ***Table B5
		gen  kt_quality_hb= kt_quality
		
		bysort town_id: egen min_quality=min(kt_quality_hb)
		replace kt_quality_hb= min_quality if kt_enroll==0 &town_id!=""
		
		gen  kt_quality_lb= kt_quality	
		
	    bysort town_id: egen max_quality=max(kt_quality_lb)
		replace kt_quality_lb= max_quality if kt_enroll==0 &town_id!=""

****Appendix C		
use "$datadir/choice_set.dta",clear


replace distance = distance/1000 /*change the unit to km*/

replace duration = duration/60 /*change the unit to mins*/

preserve
bysort hhid: egen min_distance=min(distance)
bysort hhid: egen min_duration=min(duration)
bysort hhid preschool_location: egen min_town_distance=min(distance)
bysort hhid preschool_location: egen min_town_duration=min(duration)

bysort hhid preschool_location:gen preschool_town_one=_n 
keep if preschool_town_one==1
drop if preschool_location==0


file open Table_C1 using "$outdir/Appendix/table_c1.txt", ///
		write text replace
		
	file write Table_C1 "varname" _tab  "Mean [SD]" _tab  _n

foreach X of varlist min_distance min_duration min_town_distance min_town_duration{

	summ `X' 
	
	local mean 	: display %-8.3f `r(mean)'
	local SD   	: display %-8.3f `r(sd)'
	
			
	foreach M in mean SD{
		local `M'_str = trim("``M''")
	}
		
	file write Table_C1 "`X'" _tab  "`mean_str'" _tab  _n
	file write Table_C1 "`X'" _tab  "`SD_str'" _tab  _n
	} 
	file close Table_C1 

restore

keep if actual_enroll==1

file open Table_C1 using "$outdir/Appendix/table_c1.txt", ///
		write text append
		
	file write Table_C1 "varname" _tab  "Mean [SD]" _tab  _n

foreach X of varlist distance duration {


	summ `X' 
	
	local mean 	: display %-8.3f `r(mean)'
	local SD   	: display %-8.3f `r(sd)'
	
			
	foreach M in mean SD{
		local `M'_str = trim("``M''")
	}
		
	file write Table_C1 "`X'" _tab  "`mean_str'" _tab  _n
	file write Table_C1 "`X'" _tab  "`SD_str'" _tab  _n
	} 
	file close Table_C1 


   
		
